//#ifndef ORDERKV_BLOOM_FILTER_HPP
//#define ORDERKV_BLOOM_FILTER_HPP
//
//#endif //ORDERKV_BLOOM_FILTER_HPP

/*
 *********************************************************************
 *                                                                   *
 *                           Open Bloom Filter                       *
 *                                                                   *
 * Author: Arash Partow - 2000                                       *
 * URL: http://www.partow.net                                        *
 * URL: http://www.partow.net/programming/hashfunctions/index.html   *
 *                                                                   *
 * Copyright notice:                                                 *
 * Free use of the Open Bloom Filter Library is permitted under the  *
 * guidelines and in accordance with the MIT License.                *
 * http://www.opensource.org/licenses/MIT                            *
 *                                                                   *
 *********************************************************************
*/


#ifndef INCLUDE_BLOOM_FILTER_HPP
#define INCLUDE_BLOOM_FILTER_HPP

#include <algorithm>
#include <cmath>
#include <cstddef>
#include <cstdlib>
#include <iterator>
#include <limits>
#include <string>
#include <vector>


static const std::size_t bits_per_char = 0x08;    // 8 bits in 1 char(unsigned)

static const unsigned char bit_mask[bits_per_char] = {
        0x01,  //00000001
        0x02,  //00000010
        0x04,  //00000100
        0x08,  //00001000
        0x10,  //00010000
        0x20,  //00100000
        0x40,  //01000000
        0x80   //10000000
};

class bloom_parameters
{
public:

    bloom_parameters()
            : minimum_size(1),
              maximum_size(std::numeric_limits<unsigned long long int>::max()),
              minimum_number_of_hashes(1),
              maximum_number_of_hashes(std::numeric_limits<unsigned int>::max()),
              projected_element_count(10000),
              false_positive_probability(1.0 / projected_element_count),
              random_seed(0xA5A5A5A55A5A5A5AULL)
    {}

    virtual ~bloom_parameters()
    {}

    inline bool operator!()
    {
        return (minimum_size > maximum_size)      ||
               (minimum_number_of_hashes > maximum_number_of_hashes) ||
               (minimum_number_of_hashes < 1)     ||
               (0 == maximum_number_of_hashes)    ||
               (0 == projected_element_count)     ||
               (false_positive_probability < 0.0) ||
               (std::numeric_limits<double>::infinity() == std::abs(false_positive_probability)) ||
               (0 == random_seed)                 ||
               (0xFFFFFFFFFFFFFFFFULL == random_seed);
    }

    // Allowable min/max size of the bloom filter in bits
    unsigned long long int minimum_size;
    unsigned long long int maximum_size;

    // Allowable min/max number of hash functions
    unsigned int minimum_number_of_hashes;
    unsigned int maximum_number_of_hashes;

    // The approximate number of elements to be inserted
    // into the bloom filter, should be within one order
    // of magnitude. The default is 10000.
    unsigned long long int projected_element_count;

    // The approximate false positive probability expected
    // from the bloom filter. The default is assumed to be
    // the reciprocal of the projected_element_count.
    double false_positive_probability;

    unsigned long long int random_seed;

    struct optimal_parameters_t
    {
        optimal_parameters_t()
                : number_of_hashes(0),
                  table_size(0)
        {}

        unsigned int number_of_hashes;
        unsigned long long int table_size;
    };

    optimal_parameters_t optimal_parameters;

    virtual bool compute_optimal_parameters()
    {
        /*
          Note:
          The following will attempt to find the number of hash functions
          and minimum amount of storage bits required to construct a bloom
          filter consistent with the user defined false positive probability
          and estimated element insertion count.
        */

        if (!(*this))
            return false;

        double min_m  = std::numeric_limits<double>::infinity();
        double min_k  = 0.0;
        double k      = 1.0;

        while (k < 1000.0)
        {
            const double numerator   = (- k * projected_element_count);
            const double denominator = std::log(1.0 - std::pow(false_positive_probability, 1.0 / k));

            const double curr_m = numerator / denominator;

            if (curr_m < min_m)
            {
                min_m = curr_m;
                min_k = k;
            }

            k += 1.0;
        }

        optimal_parameters_t& optp = optimal_parameters;

        optp.number_of_hashes = static_cast<unsigned int>(min_k);

        optp.table_size = static_cast<unsigned long long int>(min_m);

        optp.table_size += (((optp.table_size % bits_per_char) != 0) ? (bits_per_char - (optp.table_size % bits_per_char)) : 0);

        if (optp.number_of_hashes < minimum_number_of_hashes)
            optp.number_of_hashes = minimum_number_of_hashes;
        else if (optp.number_of_hashes > maximum_number_of_hashes)
            optp.number_of_hashes = maximum_number_of_hashes;

        if (optp.table_size < minimum_size)
            optp.table_size = minimum_size;
        else if (optp.table_size > maximum_size)
            optp.table_size = maximum_size;

        return true;
    }

};

class bloom_filter
{
protected:

    typedef unsigned int bloom_type;
    typedef unsigned char cell_type;
    typedef std::vector<unsigned char> table_type;

public:

    bloom_filter()
            : salt_count_(0),
              table_size_(0),
              projected_element_count_(0),
              inserted_element_count_ (0),
              random_seed_(0),
              desired_false_positive_probability_(0.0)
    {}

    bloom_filter(const bloom_parameters& p)
            : projected_element_count_(p.projected_element_count),
              inserted_element_count_(0),
              random_seed_((p.random_seed * 0xA5A5A5A5) + 1),
              desired_false_positive_probability_(p.false_positive_probability)
    {
        salt_count_ = p.optimal_parameters.number_of_hashes;
        table_size_ = p.optimal_parameters.table_size;

        generate_unique_salt();

        bit_table_.resize(table_size_ / bits_per_char, static_cast<unsigned char>(0x00));
    }

    bloom_filter(const bloom_filter& filter)
    {
        this->operator=(filter);
    }

    inline bool operator == (const bloom_filter& f) const
    {
        if (this != &f)
        {
            return
                    (salt_count_                         == f.salt_count_                        ) &&
                    (table_size_                         == f.table_size_                        ) &&
                    (bit_table_.size()                   == f.bit_table_.size()                  ) &&
                    (projected_element_count_            == f.projected_element_count_           ) &&
                    (inserted_element_count_             == f.inserted_element_count_            ) &&
                    (random_seed_                        == f.random_seed_                       ) &&
                    (desired_false_positive_probability_ == f.desired_false_positive_probability_) &&
                    (salt_                               == f.salt_                              ) &&
                    (bit_table_                          == f.bit_table_                         ) ;
        }
        else
            return true;
    }

    inline bool operator != (const bloom_filter& f) const
    {
        return !operator==(f);
    }

    inline bloom_filter& operator = (const bloom_filter& f)
    {
        if (this != &f)
        {
            salt_count_ = f.salt_count_;
            table_size_ = f.table_size_;
            bit_table_  = f.bit_table_;
            salt_       = f.salt_;

            projected_element_count_ = f.projected_element_count_;
            inserted_element_count_  = f.inserted_element_count_;

            random_seed_ = f.random_seed_;

            desired_false_positive_probability_ = f.desired_false_positive_probability_;
        }

        return *this;
    }

    virtual ~bloom_filter()
    {}

    inline bool operator!() const
    {
        return (0 == table_size_);
    }

    inline void clear()
    {
        std::fill(bit_table_.begin(), bit_table_.end(), static_cast<unsigned char>(0x00));
        inserted_element_count_ = 0;
    }

    inline void insert(const unsigned char* key_begin, const std::size_t& length)
    {
        std::size_t bit_index = 0;
        std::size_t bit       = 0;

        for (std::size_t i = 0; i < salt_.size(); ++i)
        {
            compute_indices(hash_ap(key_begin, length, salt_[i]), bit_index, bit);

            bit_table_[bit_index / bits_per_char] |= bit_mask[bit];
        }

        ++inserted_element_count_;
    }

    template <typename T>
    inline void insert(const T& t)
    {
        // Note: T must be a C++ POD type.
        insert(reinterpret_cast<const unsigned char*>(&t),sizeof(T));
    }

    inline void insert(const std::string& key)
    {
        insert(reinterpret_cast<const unsigned char*>(key.data()),key.size());
    }

    inline void insert(const char* data, const std::size_t& length)
    {
        insert(reinterpret_cast<const unsigned char*>(data),length);
    }

    template <typename InputIterator>
    inline void insert(const InputIterator begin, const InputIterator end)
    {
        InputIterator itr = begin;

        while (end != itr)
        {
            insert(*(itr++));
        }
    }

    inline virtual bool contains(const unsigned char* key_begin, const std::size_t length) const
    {
        std::size_t bit_index = 0;
        std::size_t bit       = 0;

        for (std::size_t i = 0; i < salt_.size(); ++i)
        {
            compute_indices(hash_ap(key_begin, length, salt_[i]), bit_index, bit);

            if ((bit_table_[bit_index / bits_per_char] & bit_mask[bit]) != bit_mask[bit])
            {
                return false;
            }
        }

        return true;
    }

    template <typename T>
    inline bool contains(const T& t) const
    {
        return contains(reinterpret_cast<const unsigned char*>(&t),static_cast<std::size_t>(sizeof(T)));
    }

    inline bool contains(const std::string& key) const
    {
        return contains(reinterpret_cast<const unsigned char*>(key.c_str()),key.size());
    }

    inline bool contains(const char* data, const std::size_t& length) const
    {
        return contains(reinterpret_cast<const unsigned char*>(data),length);
    }

    template <typename InputIterator>
    inline InputIterator contains_all(const InputIterator begin, const InputIterator end) const
    {
        InputIterator itr = begin;

        while (end != itr)
        {
            if (!contains(*itr))
            {
                return itr;
            }

            ++itr;
        }

        return end;
    }

    template <typename InputIterator>
    inline InputIterator contains_none(const InputIterator begin, const InputIterator end) const
    {
        InputIterator itr = begin;

        while (end != itr)
        {
            if (contains(*itr))
            {
                return itr;
            }

            ++itr;
        }

        return end;
    }

    inline virtual unsigned long long int size() const
    {
        return table_size_;
    }

    inline unsigned long long int element_count() const
    {
        return inserted_element_count_;
    }

    inline double effective_fpp() const
    {
        /*
          Note:
          The effective false positive probability is calculated using the
          designated table size and hash function count in conjunction with
          the current number of inserted elements - not the user defined
          predicated/expected number of inserted elements.
        */
        return std::pow(1.0 - std::exp(-1.0 * salt_.size() * inserted_element_count_ / size()), 1.0 * salt_.size());
    }

    inline bloom_filter& operator &= (const bloom_filter& f)
    {
        /* intersection */
        if (
                (salt_count_  == f.salt_count_ ) &&
                (table_size_  == f.table_size_ ) &&
                (random_seed_ == f.random_seed_)
                )
        {
            for (std::size_t i = 0; i < bit_table_.size(); ++i)
            {
                bit_table_[i] &= f.bit_table_[i];
            }
        }

        return *this;
    }

    inline bloom_filter& operator |= (const bloom_filter& f)
    {
        /* union */
        if (
                (salt_count_  == f.salt_count_ ) &&
                (table_size_  == f.table_size_ ) &&
                (random_seed_ == f.random_seed_)
                )
        {
            for (std::size_t i = 0; i < bit_table_.size(); ++i)
            {
                bit_table_[i] |= f.bit_table_[i];
            }
        }

        return *this;
    }

    inline bloom_filter& operator ^= (const bloom_filter& f)
    {
        /* difference */
        if (
                (salt_count_  == f.salt_count_ ) &&
                (table_size_  == f.table_size_ ) &&
                (random_seed_ == f.random_seed_)
                )
        {
            for (std::size_t i = 0; i < bit_table_.size(); ++i)
            {
                bit_table_[i] ^= f.bit_table_[i];
            }
        }

        return *this;
    }

    inline const cell_type* table() const
    {
        return bit_table_.data();
    }

    inline std::size_t hash_count()
    {
        return salt_.size();
    }

protected:

    inline virtual void compute_indices(const bloom_type& hash, std::size_t& bit_index, std::size_t& bit) const
    {
        bit_index = hash % table_size_;
        bit       = bit_index % bits_per_char;
    }

    void generate_unique_salt()
    {
        /*
          Note:
          A distinct hash function need not be implementation-wise
          distinct. In the current implementation "seeding" a common
          hash function with different values seems to be adequate.
        */
        const unsigned int predef_salt_count = 128;

        static const bloom_type predef_salt[predef_salt_count] =
                {
                        0xAAAAAAAA, 0x55555555, 0x33333333, 0xCCCCCCCC,
                        0x66666666, 0x99999999, 0xB5B5B5B5, 0x4B4B4B4B,
                        0xAA55AA55, 0x55335533, 0x33CC33CC, 0xCC66CC66,
                        0x66996699, 0x99B599B5, 0xB54BB54B, 0x4BAA4BAA,
                        0xAA33AA33, 0x55CC55CC, 0x33663366, 0xCC99CC99,
                        0x66B566B5, 0x994B994B, 0xB5AAB5AA, 0xAAAAAA33,
                        0x555555CC, 0x33333366, 0xCCCCCC99, 0x666666B5,
                        0x9999994B, 0xB5B5B5AA, 0xFFFFFFFF, 0xFFFF0000,
                        0xB823D5EB, 0xC1191CDF, 0xF623AEB3, 0xDB58499F,
                        0xC8D42E70, 0xB173F616, 0xA91A5967, 0xDA427D63,
                        0xB1E8A2EA, 0xF6C0D155, 0x4909FEA3, 0xA68CC6A7,
                        0xC395E782, 0xA26057EB, 0x0CD5DA28, 0x467C5492,
                        0xF15E6982, 0x61C6FAD3, 0x9615E352, 0x6E9E355A,
                        0x689B563E, 0x0C9831A8, 0x6753C18B, 0xA622689B,
                        0x8CA63C47, 0x42CC2884, 0x8E89919B, 0x6EDBD7D3,
                        0x15B6796C, 0x1D6FDFE4, 0x63FF9092, 0xE7401432,
                        0xEFFE9412, 0xAEAEDF79, 0x9F245A31, 0x83C136FC,
                        0xC3DA4A8C, 0xA5112C8C, 0x5271F491, 0x9A948DAB,
                        0xCEE59A8D, 0xB5F525AB, 0x59D13217, 0x24E7C331,
                        0x697C2103, 0x84B0A460, 0x86156DA9, 0xAEF2AC68,
                        0x23243DA5, 0x3F649643, 0x5FA495A8, 0x67710DF8,
                        0x9A6C499E, 0xDCFB0227, 0x46A43433, 0x1832B07A,
                        0xC46AFF3C, 0xB9C8FFF0, 0xC9500467, 0x34431BDF,
                        0xB652432B, 0xE367F12B, 0x427F4C1B, 0x224C006E,
                        0x2E7E5A89, 0x96F99AA5, 0x0BEB452A, 0x2FD87C39,
                        0x74B2E1FB, 0x222EFD24, 0xF357F60C, 0x440FCB1E,
                        0x8BBE030F, 0x6704DC29, 0x1144D12F, 0x948B1355,
                        0x6D8FD7E9, 0x1C11A014, 0xADD1592F, 0xFB3C712E,
                        0xFC77642F, 0xF9C4CE8C, 0x31312FB9, 0x08B0DD79,
                        0x318FA6E7, 0xC040D23D, 0xC0589AA7, 0x0CA5C075,
                        0xF874B172, 0x0CF914D5, 0x784D3280, 0x4E8CFEBC,
                        0xC569F575, 0xCDB2A091, 0x2CC016B4, 0x5C5F4421
                };

        if (salt_count_ <= predef_salt_count)
        {
            std::copy(predef_salt,
                      predef_salt + salt_count_,
                      std::back_inserter(salt_));

            for (std::size_t i = 0; i < salt_.size(); ++i)
            {
                /*
                   Note:
                   This is done to integrate the user defined random seed,
                   so as to allow for the generation of unique bloom filter
                   instances.
                */
                salt_[i] = salt_[i] * salt_[(i + 3) % salt_.size()] + static_cast<bloom_type>(random_seed_);
            }
        }
        else
        {
            std::copy(predef_salt, predef_salt + predef_salt_count, std::back_inserter(salt_));

            srand(static_cast<unsigned int>(random_seed_));

            while (salt_.size() < salt_count_)
            {
                bloom_type current_salt = static_cast<bloom_type>(rand()) * static_cast<bloom_type>(rand());

                if (0 == current_salt)
                    continue;

                if (salt_.end() == std::find(salt_.begin(), salt_.end(), current_salt))
                {
                    salt_.push_back(current_salt);
                }
            }
        }
    }

    inline bloom_type hash_ap(const unsigned char* begin, std::size_t remaining_length, bloom_type hash) const
    {
        const unsigned char* itr = begin;
        unsigned int loop        = 0;

        while (remaining_length >= 8)
        {
            const unsigned int& i1 = *(reinterpret_cast<const unsigned int*>(itr)); itr += sizeof(unsigned int);
            const unsigned int& i2 = *(reinterpret_cast<const unsigned int*>(itr)); itr += sizeof(unsigned int);

            hash ^= (hash <<  7) ^  i1 * (hash >> 3) ^
                    (~((hash << 11) + (i2 ^ (hash >> 5))));

            remaining_length -= 8;
        }

        if (remaining_length)
        {
            if (remaining_length >= 4)
            {
                const unsigned int& i = *(reinterpret_cast<const unsigned int*>(itr));

                if (loop & 0x01)
                    hash ^=    (hash <<  7) ^  i * (hash >> 3);
                else
                    hash ^= (~((hash << 11) + (i ^ (hash >> 5))));

                ++loop;

                remaining_length -= 4;

                itr += sizeof(unsigned int);
            }

            if (remaining_length >= 2)
            {
                const unsigned short& i = *(reinterpret_cast<const unsigned short*>(itr));

                if (loop & 0x01)
                    hash ^=    (hash <<  7) ^  i * (hash >> 3);
                else
                    hash ^= (~((hash << 11) + (i ^ (hash >> 5))));

                ++loop;

                remaining_length -= 2;

                itr += sizeof(unsigned short);
            }

            if (remaining_length)
            {
                hash += ((*itr) ^ (hash * 0xA5A5A5A5)) + loop;
            }
        }

        return hash;
    }

    std::vector<bloom_type>    salt_;
    std::vector<unsigned char> bit_table_;
    unsigned int               salt_count_;
    unsigned long long int     table_size_;
    unsigned long long int     projected_element_count_;
    unsigned long long int     inserted_element_count_;
    unsigned long long int     random_seed_;
    double                     desired_false_positive_probability_;
};

inline bloom_filter operator & (const bloom_filter& a, const bloom_filter& b)
{
    bloom_filter result = a;
    result &= b;
    return result;
}

inline bloom_filter operator | (const bloom_filter& a, const bloom_filter& b)
{
    bloom_filter result = a;
    result |= b;
    return result;
}

inline bloom_filter operator ^ (const bloom_filter& a, const bloom_filter& b)
{
    bloom_filter result = a;
    result ^= b;
    return result;
}

class compressible_bloom_filter : public bloom_filter
{
public:

    compressible_bloom_filter(const bloom_parameters& p)
            : bloom_filter(p)
    {
        size_list.push_back(table_size_);
    }

    inline unsigned long long int size() const
    {
        return size_list.back();
    }

    inline bool compress(const double& percentage)
    {
        if (
                (percentage <    0.0) ||
                (percentage >= 100.0)
                )
        {
            return false;
        }

        unsigned long long int original_table_size = size_list.back();
        unsigned long long int new_table_size = static_cast<unsigned long long int>((size_list.back() * (1.0 - (percentage / 100.0))));

        new_table_size -= new_table_size % bits_per_char;

        if (
                (bits_per_char  >       new_table_size) ||
                (new_table_size >= original_table_size)
                )
        {
            return false;
        }

        desired_false_positive_probability_ = effective_fpp();

        const unsigned long long int new_tbl_raw_size = new_table_size / bits_per_char;

        table_type tmp(new_tbl_raw_size);

        std::copy(bit_table_.begin(), bit_table_.begin() + new_tbl_raw_size, tmp.begin());

        typedef table_type::iterator itr_t;

        itr_t itr     = bit_table_.begin() + (new_table_size      / bits_per_char);
        itr_t end     = bit_table_.begin() + (original_table_size / bits_per_char);
        itr_t itr_tmp = tmp.begin();

        while (end != itr)
        {
            *(itr_tmp++) |= (*itr++);
        }

        std::swap(bit_table_, tmp);

        size_list.push_back(new_table_size);

        return true;
    }

private:

    inline void compute_indices(const bloom_type& hash, std::size_t& bit_index, std::size_t& bit) const
    {
        bit_index = hash;

        for (std::size_t i = 0; i < size_list.size(); ++i)
        {
            bit_index %= size_list[i];
        }

        bit = bit_index % bits_per_char;
    }

    std::vector<unsigned long long int> size_list;
};

#endif


/*
  Note 1:
  If it can be guaranteed that bits_per_char will be of the form 2^n then
  the following optimization can be used:

  bit_table_[bit_index >> n] |= bit_mask[bit_index & (bits_per_char - 1)];

  Note 2:
  For performance reasons where possible when allocating memory it should
  be aligned (aligned_alloc) according to the architecture being used.
*/